* Disappointed Expectations: Downward Mobility and Electoral Change
* Thomas Kurer and Briitta van Staalduinen
* American Political Science Review

* Create Main Data Set
* SOEPcore v35 (2019)
* auxiliary SOEP files: pgen, bioparen, hbrutto, biosoc, ppfad

global soepv35 "..insert path.."
global output "..insert path.."
global savedata "..insert path.."


************************************************************************************
* 2018
************************************************************************************

clear

use "$soepv35/bip.dta" 

sort persnr syear
tab syear

* add pgen data

merge 1:1 persnr using "$soepv35/bipgen.dta"
tab _merge
drop if _merge==2
drop _merge
* add pgen data

merge 1:1 persnr using "$soepv35/bioparen.dta"
tab _merge
drop if _merge!=3
drop _merge

* Merge with Bundesland from HBRUTTO
merge m:m hhnr using "$soepv35/bihbrutto.dta", keepusing(bibula)
drop if _merge != 3 /* have household nr but no personal file */
drop _merge

* Merge with relation to HH head from pequiv
merge m:m hhnr using "$soepv35/bipequiv.dta", keepusing(d1110518)
drop if _merge != 3 
drop _merge

* Merge with school experience in childhood from biosoc
merge m:m persnr using "$soepv35/biosoc.dta", keepusing(bsschla bselkuem bsntdeut bsntmath bsntfmd1)
drop if _merge == 2 
drop _merge

* Merge with birthregion and location in 1989 from ppath
merge m:m persnr using "$soepv35/ppfad.dta", keepusing(birthregion loc1989 locinfo migback)
drop if _merge == 2 
drop _merge

* Merge with birthregion and location in 1989 from ppath
merge m:m persnr using "$soepv35/bioage17.dta", keepusing(byisei bywaberf bywaarbp bybwunja byzbinf byzblas byzbrau byntdeut byntmath byntfmd1)
drop if _merge == 2 
drop _merge

* drop flucht samples

drop if sample1 == 30 | sample1 == 31 | sample1 == 34

* renaming and recoding

rename hhnr hhnr_orig
rename bi* *
rename rthregion birthregion

gen relhh = d1110518

* childhood

gen childloc = locchildh
replace childloc=. if locchildh<1

gen sameloc = locchild1
replace sameloc=. if locchild1<1

gen yearparents = living1
replace yearparents=. if living1<0

gen yearsinglem = living2
replace yearsinglem=. if living2<0

gen yearmwp = living3
replace yearmwp=. if living3<0

gen yearsinglep = living4
replace yearsinglep=. if living4<0

gen yearfwp = living5
replace yearfwp=. if living5<0

gen yearrelatives = living6
replace yearrelatives=. if living6<0

gen yearfoster = living7
replace yearfoster=. if living7<0

gen yearprotectory = living8
replace yearprotectory=. if living8<0

foreach bsvar of varlist bs* {
replace `bsvar' = . if `bsvar'<0
} 

replace bula=. if bula<0
replace birthregion=. if birthregion<0

gen region1989 = .
replace region1989=1 if loc1989==1
replace region1989=2 if loc1989==2
replace region1989=3 if loc1989==3
replace region1989=4 if locinfo==0

label variable region1989   "residence in 1989" 
label define region1989 1 "east" 2 "west" 3 "abroad" 4 "born after 1989"
label value region1989 region1989

gen bulaschool2 = bsschla
replace bulaschool2 = 11 if bsschla==0 | bsschla==18 /* various berlins */
replace bulaschool2=. if bsschla<0 
replace bulaschool2=. if bsschla==98 /* germany before 1949, no region declared */
* harmonize laender between variables
replace bulaschool2 = 12 if bsschla==13
replace bulaschool2 = 13 if bsschla==12
replace bulaschool2 = 15 if bsschla==14
replace bulaschool2 = 16 if bsschla==15
replace bulaschool2 = 14 if bsschla==16

gen bula_child = bulaschool2

tab bula_child birthregion

gen bula_youth = bula_child
replace bula_youth = birthregion if bula_youth==.

* grade

gen grade_dt = bsntdeut

replace grade_dt = . if bsntdeut<0
replace grade_dt = . if bsntdeut==7 /* did not have this subject */

gen grade_math = bsntmath

replace grade_math = . if bsntmath<0
replace grade_math = . if bsntmath==7 /* did not have this subject */

gen grade_flan = bsntfmd1

replace grade_flan = . if bsntfmd1<0
replace grade_flan = . if bsntfmd1==7 /* did not have this subject */

* complete with recent grades from bioage17 data
replace byntdeut=. if byntdeut<0
replace grade_dt = byntdeut if grade_dt==.

replace byntmath=. if byntmath<0
replace grade_math = byntmath if grade_math==.

replace byntfmd1=. if byntfmd1<0
replace grade_flan = byntfmd1 if grade_flan==.


egen grades = rmean(grade_dt grade_math grade_flan)

* bioage17 vars


replace byisei=. if byisei<0

replace bywaberf=. if bywaberf<0

replace bywaarbp=. if bywaarbp<0

replace bybwunja=. if bybwunja<0
replace bybwunja=0 if bybwunja==3

replace byzbinf=. if byzbinf<0
replace byzbinf=byzbinf*(-1)+5
label val byzbinf

replace byzblas=. if byzblas<0
replace byzblas=byzblas*(-1)+5
label val byzblas

replace byzbrau=. if byzbrau<0
replace byzbrau=byzbrau*(-1)+5
label val byzbrau



* income

gen incomem = p_119_03

replace incomem=. if p_119_03<0


rename isei88_18 isei88
* existing: fisei, misei
replace isei88=. if isei88<0


replace misei88=. if misei88<0
replace fisei88=. if fisei88<0

* new: 08 version

rename isei08_18 isei08
replace isei08=. if isei08<0

replace fisei08=. if fisei08<0
replace misei08=. if misei08<0

* isei88 vs 08 corr only 0.8. new isco scheme.


rename isco88_18 isco88
rename isco08_18 isco08
* existing: fisco88, misco88
replace isco88=. if isco88<0
replace isco08=. if isco08<0
replace fisco88=. if fisco88<0
replace misco88=. if misco88<0

* crosswalk isco08 to isco88 if only first is available

gen p_isco08 = isco08

merge m:1 p_isco08 using "$savedata/cw_isco08_isei88.dta"
tab syear if _merge==3

tab _merge
drop if _merge==2
drop _merge

gen isei88_full = isei88
replace isei88_full = isei88_cw if isei88 ==.

merge m:1 fisco08 using "$savedata/cw_fisco08_fisei88.dta"
tab syear if _merge==3

tab _merge
drop if _merge==2
drop _merge

gen fisei88_full = fisei88
replace fisei88_full = fisei88_cw if fisei88 ==.

merge m:1 misco08 using "$savedata/cw_misco08_misei88.dta"
tab syear if _merge==3

tab _merge
drop if _merge==2
drop _merge

gen misei88_full = misei88
replace misei88_full = misei88_cw if misei88 ==.


rename pbirthy ybirth
* existing fybirth mybirth
replace ybirth=. if ybirth<0
replace fybirth=. if fybirth<0
replace mybirth=. if mybirth<0


rename isced97_18 isced97
rename isced11_18 isced11
replace isced97 = . if isced97 < 0
replace isced11 = . if isced11 < 0
* no equivalent isced edu var for parents.

gen edu = psbil
replace edu=. if psbil<0
replace edu=. if psbil==7 /* noch kein abschluss */
replace edu=0 if psbil==6 | psbil==8 /* dropout or never went to school */
replace edu=. if psbil==5 /* anderer abschluss, not defined */
/*
1 = hauptschule
2 = realschule
3= fachhochschulreife
4= abitur
*/
replace edu=5 if pbbil02==1 | pbbil02==4 /* fachhochschule */
replace edu=6 if pbbil02==2 | pbbil02==3 | pbbil02==5 | pbbil02==6 | pbbil02==7 /* tertiary incl. phd */

table edu, c(mean incomem sd incomem)

gen fedu = fsedu
replace fedu=. if fsedu<0
replace fedu=. if fsedu==0 /* weiss nicht */
replace fedu=0 if fsedu==6 /* no degree */
replace fedu=. if fsedu==5 /* other, not defined */
replace fedu=. if fsedu>6 /* migrant subsample, degrees in other countries */
/*
1 = hauptschule
2 = realschule
3= fachhochschulreife
4= abitur
*/
replace fedu=5 if inrange(fprofedu, 27,30)
replace fedu=6 if inlist(fprofedu, 31,32)

* n in 3 is too small. add fachhochschulreife to abitur
replace fedu = 3 if fedu==4
replace fedu = 4 if fedu==5
replace fedu = 5 if fedu==6


gen medu = msedu
replace medu=. if msedu<0
replace medu=. if msedu==0 /* weiss nicht */
replace medu=0 if msedu==6 /* no degree */
replace medu=. if msedu==5 /* other, not defined */
replace medu=. if msedu>6 /* migrant subsample, degrees in other countries */
/*
1 = hauptschule
2 = realschule
3= fachhochschulreife
4= abitur
*/
replace medu=5 if inrange(mprofedu, 27,30)
replace medu=6 if inlist(mprofedu, 31,32)

* n in 3 is too small. add fachhochschulreife to abitur
replace medu = 3 if medu==4
replace medu = 4 if medu==5
replace medu = 5 if medu==6

gen german = nation18==1
gen fgerman = fnat == 1
replace fgerman=. if fnat<0
gen mgerman = mnat == 1
replace mgerman=. if mnat<0

rename egp88_18 egp
rename megp88 megp
rename fegp88 fegp

rename egp08_18 egp08

replace egp=. if egp<0
replace fegp=. if fegp<0
replace megp=. if megp<0

replace egp08=. if egp08<0
replace fegp08=. if fegp08<0
replace megp08=. if megp08<0

* Employment Status etc.

gen empstat = p_43
gen fulltime = empstat==1
gen parttime = empstat==2
gen notemp = empstat==9

gen unemp = p_21==1
replace unemp=. if p_21<0

gen prob_unemp = p_107_02
replace prob_unemp=. if p_107_02<0

gen prob_promo = p_107_03
replace prob_promo=. if p_107_03<0

gen prob_demotion = p_107_07
replace prob_demotion=. if p_107_07<0

gen prob_giveupjob = p_107_05
replace prob_giveupjob=. if p_107_05<0


* Dependent Variables: Political Behavior

* party id

gen partyid_yn = p_172==1
replace partyid_yn=. if p_172<0

gen partyid = p_173_01
replace partyid=. if p_173_01<0

gen partyid_int = p_174
replace partyid_int=. if p_174<0

gen afd=partyid==27
gen spd=partyid==1
gen cdu=partyid==2
gen csu=partyid==3
gen cducsu=(cdu==1 | csu==1 | partyid==13)
gen fdp=partyid==4
gen gruen=partyid==5
gen linke=partyid==6

gen msr=(cducsu==1 | fdp==1 | partyid==14)
gen msl=(spd==1 | gruen==1 | partyid==9)
gen left=(msl==1 | linke==1 | partyid==16 | partyid==17)
gen rr = (afd==1 | partyid==7)

foreach var of varlist afd-rr {
replace `var' = . if partyid==.
} 

* party vote choice (bundestagswahl 2017)

gen partyvote = p_175
replace partyvote=. if p_175<0
replace partyvote=. if p_175==29 /* not eligible */

* party vote dummies
* novote in zero

gen vafd=partyvote==27
gen vspd=partyvote==1
gen vcdu=partyvote==2
gen vcsu=partyvote==3
gen vcducsu=(vcdu==1 | vcsu==1 | partyvote==13)
gen vfdp=partyvote==4
gen vgruen=partyvote==5
gen vlinke=partyvote==6

gen vmsr=(vcducsu==1 | vfdp==1 | partyvote==14 | partyvote==22)
gen vmsl=(vspd==1 | vgruen==1 | partyvote==9)
gen vleft=(vmsl==1 | vlinke==1 | partyvote==16 | partyvote==17)
gen vrr = (vafd==1 | partyvote==7 | partyvote==30)

gen novote=partyvote==28

foreach var of varlist vafd-novote {
replace `var' = . if partyvote==.
} 

* political interest

gen polintr = p_171
replace polintr=. if p_171<0
replace polintr=polintr*(-1)+5

* satisfaction vars

gen sathealth = p_01_01
replace sathealth=. if p_01_01<0

gen satsleep = p_01_02
replace satsleep=. if p_01_02<0

gen satwork = p_01_03
replace satwork = . if p_01_03<0

gen sathhinc = p_01_05
replace sathhinc=. if p_01_05<0

gen satinc = p_01_06
replace satinc=. if p_01_06<0

gen satfamily = p_01_10
replace satfamily=. if p_01_10<0

gen satlivingstand = p_01_11
replace satlivingstand=. if p_01_11<0

gen satlifenow = p_202_01
replace satlifenow=. if p_202_01<0

gen satlifein1y = p_202_02
replace satlifein1y=. if p_202_02<0

gen satlifein5y = p_202_03
replace satlifein5y=. if p_202_03<0

* worries

gen worry_work = p_170_13
replace worry_work=. if p_170_13<0
replace worry_work=worry_work*(-1)+4

gen worry_econsoc = p_170_01
replace worry_econsoc=. if p_170_01<0
replace worry_econsoc=worry_econsoc*(-1)+4

gen worry_econego = p_170_02
replace worry_econego=. if p_170_02<0
replace worry_econego=worry_econego*(-1)+4

gen worry_crime = p_170_08
replace worry_crime=. if p_170_08<0
replace worry_crime=worry_crime*(-1)+4

gen worry_soc = p_170_09
replace worry_soc=. if p_170_09<0
replace worry_soc=worry_soc*(-1)+4

gen worry_mig = p_170_11
replace worry_mig=. if p_170_11<0
replace worry_mig=worry_mig*(-1)+4

* keep rel vars

gen female = (sex==2)
replace female = . if sex<1 | sex==.

keep persnr hhnr relhh syear bula region1989 birthregion bula_child bula_youth childloc sameloc grade* year* bs* female incomem *isei88 *isei88_full *isei08 *isco88 isco08 *ybirth isced* *edu psbil pbbil02 fsedu fprofedu msedu mprofedu *german migback *egp *egp08 empstat fulltime parttime notemp unemp prob_* partyid* afd-rr partyvote* vafd-novote polintr sat* worry_* ref_* byisei bywaberf bywaarbp bybwunja byzbinf byzblas byzbrau

save "$savedata/soep_pairs2018.dta", replace
outsheet using "$savedata/soep_pairs2018.csv", comma replace

